package com.xqy.wexmini.utils;

import com.xqy.wexmini.domain.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class Newsutils {
    public static List<Content> parseJD(String keywords, Integer page) throws IOException {
        String url="https://www.baidu.com/s?rtt=1&bsst=1&cl=2&tn=news&rsv_dl=ns_pc&word="+keywords+"&x_bfe_rqs=0320800000000000000022&x_bfe_tjscore=0.080000&tngroupname=organic_news&newVideo=12&goods_entry_switch=1&pn="+page*10;
//        Document document = Jsoup.parse(new URL(url), 30000);
        Document document = Jsoup.connect(url).ignoreContentType(true).ignoreHttpErrors(true)
                .followRedirects(true).timeout(5000).userAgent("Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/4.0;)").get();
        Element element = document.getElementById("content_left");
        Elements elements=element.getElementsByClass("result-op c-container xpath-log new-pmd");
        List<Content> goodsList = new ArrayList<>();
        for(Element el:elements){
            String time=el.getElementsByClass("c-color-gray2 c-font-normal c-gap-right-xsmall").eq(0).text();
            String url1 = el.getElementsByTag("a").eq(0).attr("href");
            String title = el.getElementsByTag("a").eq(0).text();
            String contents = el.getElementsByClass("c-font-normal c-color-text").eq(0).text();
//            Elements eq = el.getAllElements().eq(0);
//            System.out.println("===================================================");
/*            System.out.println(img);
            System.out.println(price);
            System.out.println(title);*/
//            System.out.println(eq.html());
            Content content=new Content();
            content.setTime(time);
            content.setTitle(title);
            content.setUrl(url1);
            content.setContent(contents);
            if(content.getUrl().matches(".*(baidu).*")){
                ;
            }else {
                goodsList.add(content);
            }
        }
        return goodsList;
    }
    public static String parseJD1(String url) throws IOException {
        Document document = Jsoup.connect(url).ignoreContentType(true).ignoreHttpErrors(true)
                .followRedirects(true).timeout(5000).userAgent("Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/4.0;)").get();
        Elements div = document.getElementsByTag("div");
        return div.html();
    }
}
